# Now
! date
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from termcolor import colored, cprint
#thread
import threading
import concurrent.futures
import plotly.graph_objects as go
#import plotly.offline as offline
#offline.init_notebook_mode(connected=True)
from matplotlib.dates import DateFormatter
from matplotlib import rcParams
import matplotlib.pyplot as plt
%matplotlib inline
#%matplotlib notebook # If you switch from inline to notebook, you must restart the kernel!
#matplotlibで日本語フォントを使う設定
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Hiragino Maru Gothic Pro', 'Yu Gothic', 'Meirio', 'Takao', 'IPAexGothic', 'IPAPGothic', 'VL PGothic', 'Noto Sans CJK JP']
# csv読み込み
df_raw = pd.read_csv( 'https://raw.githubusercontent.com/swsoyee/2019-ncov-japan/master/Data/byDate.csv')
# 全部 Nanの行は削除
if( df_raw.iloc[-1:,1:].isnull().all(axis=1).values[0] == True ):
df_raw = df_raw.drop(df_raw.index.values[-1])
print('drop [.isnull().all()] record')
# 日付に変換
df_raw['date'] = pd.to_datetime(df_raw['date'], format='%Y%m%d')
days = df_raw['date'].dt.strftime('%m/%d/%y').values.tolist()
# 欠損値を0にする
df_raw = df_raw.fillna(0)
# 増加数と日本全体(クルーズ船とチャーター便を除く)
inc = df_raw
inc['日本全体'] = inc.drop(columns=['クルーズ船','チャーター便']).iloc[:,1:].sum(axis=1)
# 累積数を出す
tot = pd.concat( [inc.iloc[:,0:1], inc.iloc[:,1:].cumsum()], axis=1)
# 流用元(global版)と同じ構成になるよう
tmp = tot.set_index('date').T.reset_index()
tmp.columns.name = None
tmp = tmp.rename(columns={'index':'Country/Region'})
tmp['Province/State'] = np.nan
tmp['Lat'] = np.nan
tmp['Long'] = np.nan
df = pd.concat([tmp[['Province/State', 'Country/Region', 'Lat', 'Long']], tmp.iloc[:, 1:-3]], axis=1)
df.columns = df.columns[0:4].values.tolist() + days
df.head()
cases = df.iloc[:,[1,-1]].groupby('Country/Region').sum()
mostrecentdate = cases.columns[0]
print('\nTotal number of cases (in countries with at least 100 cases) as of', mostrecentdate)
cases = cases.sort_values(by = mostrecentdate, ascending = False)
cases = cases[cases[mostrecentdate] >= 100]
#cases = cases[cases[mostrecentdate] >= 20]
cases.head()
class CurveF(object):
@staticmethod
def liner(t, a, b):
return(a * t + b)
@staticmethod
def logistic(t, a, b, c, d):
return c + (d - c)/(1 + a * np.exp(- b * t))
@staticmethod
def exponential(t, a, b, c):
return a * np.exp(b * t) + c
def __init__(self, x=None, y=None, maxfev=1000000):
self.x = None
self.y = None
self.maxfev = maxfev
self.set_xy(x,y)
self.fitinfo = {
'liner' :{ 'func': CurveF.liner, 'popt': None, 'pcov': None, 'para': dict()},
'logistic' :{ 'func': CurveF.logistic, 'popt': None, 'pcov': None, 'para': dict()},
'exponential':{ 'func': CurveF.exponential, 'popt': None, 'pcov': None, 'para': dict(bounds=([0,0,-100],[100,0.9,100]))}
}
def set_xy(self, x, y):
self.x = x
self.y = y
if( self.x is not None):
if( type(self.x) != np.ndarray ):
self.x = np.array(self.x, dtype=np.float)
if( self.y is not None):
if( type(self.y) != np.ndarray ):
self.x = np.array(self.y, dtype=np.float)
def fit(self, verbose=None, idstr=None):
for i, (key, val) in enumerate(self.fitinfo.items()):
if(verbose):
prtstr = ''
if( idstr is not None ):
prtstr += f'[{idstr}]'
if(verbose is not None):
if(verbose == 1 and i==0):
print(f'{prtstr} fitting... ')
if(verbose >= 2):
print(f'{prtstr} fitting... {key}')
popt = None
pcov = None
try:
popt, pcov = curve_fit( val['func'], self.x, self.y, maxfev=self.maxfev, **val['para'])
except:
print(f"{prtstr} exception!!")
val['popt'] = popt
val['pcov'] = pcov
if(verbose is not None):
print(f'{prtstr} finish.')
def calc(self, fitname, x ):
fdic = self.fitinfo[fitname]
if(fdic['popt'] is None):
ret = []
else:
ret = fdic['func'](x, *fdic['popt'])
return(ret)
#tokyo = df[ df['Country/Region'] == '日本全体']
tokyo = df[ df['Country/Region'] == '東京']
y = tokyo.iloc[0,4:].values
x = np.arange( len(y))
# データセット準備
obs_df = df[ df['Country/Region'] == '日本全体']
obsY = obs_df.iloc[0,4:].values
obsInc = np.diff(np.insert(obsY, 0, 0))
tsize = len(obs_df.iloc[0,4:].values)
tstart = 20
days_num = tsize - tstart + 1
days_arange = np.arange(days_num)
# データセットを時系列に整理
datafits = []
for offset in days_arange:
at = tstart + offset
at_obsY = obsY[0:at]
atdic = {}
atdic['at'] = at
atdic['day_offset'] = offset
atdic['data_Y'] = at_obsY
atdic['curve'] = CurveF( np.arange(len(at_obsY)), y=at_obsY)
atdic['data_Inc'] = obsInc[0:at]
datafits.append(atdic)
# マルチスレッドでフィット
maxthreadd = 10
if( maxthreadd == 0):
for i, one in enumerate(datafits):
one['curve'].fit(verbose=2,idstr=f'{i:03d}')
else:
print(f"thread pool start !! - num={maxthreadd}")
executor = concurrent.futures.ThreadPoolExecutor(max_workers=maxthreadd)
count = 0
for i, one in enumerate(datafits):
executor.submit(one['curve'].fit, verbose=0, idstr=f'{i:03d}')
count += 1
print(f"thread pool {count} submmit")
executor.shutdown()
print("main thread finish!!")
# 上記データを元にplotlyのdataの形式にする
data_series = []
for i, one in enumerate(datafits):
at = one['at']
at14 = at + 14
ad = [
# observed total
{
"x": np.arange(at),
"y": one['data_Y'],
"type": "scatter",
"mode": "markers",
"name": "Total"
},
# observed increase
{
"x": np.arange(at),
"y": one['data_Inc'],
"type": "bar",
"name": "Increase"
},
# fit liner
{
"x": np.arange(at14),
"y": one['curve'].calc('liner', np.arange(at14)),
"mode": "lines",
"line": {"dash": "dash"},
"name": "liner"
},
# fit logsitic
{
"x": np.arange(at14),
"y": one['curve'].calc('logistic',np.arange(at14)),
"mode": "lines",
"line": {"dash": "dash"},
"name": "logistic"
},
# fit exponential
{
"x": np.arange(at14),
"y": one['curve'].calc('exponential',np.arange(at14)),
"mode": "lines",
"line": {"dash": "dot"},
"name": "exponential"
}
]
data_series.append(ad)
# make figure
fig_dict = {
"data": [],
"layout": {},
"frames": []
}
fig_dict["layout"]["updatemenus"] = [
{
"buttons": [
{
"args": [None, {"frame": {"duration": 200, "redraw": False},
"fromcurrent": True, "transition": {"duration": 100,
"easing": "quadratic-in-out"}}],
"label": "Play",
"method": "animate"
},
{
"args": [[None], {"frame": {"duration": 0, "redraw": False},
"mode": "immediate",
"transition": {"duration": 0}}],
"label": "Pause",
"method": "animate"
}
],
"direction": "left",
"pad": {"r": 10, "t": 87},
"showactive": False,
"type": "buttons",
"x": 0.1,
"xanchor": "right",
"y": 0,
"yanchor": "top"
}
]
sliders_dict = {
"active": 0,
"yanchor": "top",
"xanchor": "left",
"currentvalue": {
"font": {"size": 20},
"prefix": "Day ",
"visible": True,
"xanchor": "right"
},
"transition": {"duration": 300, "easing": "cubic-in-out"},
"pad": {"b": 10, "t": 50},
"len": 0.9,
"x": 0.1,
"y": 0,
"steps": []
}
# make plotly data
fig_dict["data"] = data_series[-1]
# make plotly frames
for offset in days_arange:
frame = {"data": [], "name": str(offset)}
frame["data"] = data_series[offset]
fig_dict["frames"].append(frame)
slider_step = {
"args": [
[offset]
,
{
"frame": {"duration": 300, "redraw": False},
"mode": "immediate",
"transition": {"duration": 300}
}
],
"label": str(offset),
"method": "animate"
}
sliders_dict["steps"].append(slider_step)
fig_dict["layout"]["sliders"] = [sliders_dict]
fig = go.Figure(fig_dict)
kwparam = {'width':950 , 'height':700, 'autosize':False}
fig.update_layout(title='罹患者数推移', xaxis_title='確認日', yaxis_title='人数', **kwparam)
fig.show()
kwparam = {'width':950 , 'height':700, 'autosize':False}
fig.update_layout(title='罹患者数推移(Y軸対数)', xaxis_title='確認日', yaxis_title='人数', **kwparam)
fig.update_layout(yaxis = { "type": "log"} )
fig.show()
# make data
obs_df = df[ df['Country/Region'] == '日本全体']
obsY = obs_df.iloc[0,4:].values
obsInc = np.diff(np.insert(obsY, 0, 0))
tsize = len(obs_df.iloc[0,4:].values)
tstart = 20
days_num = tsize - tstart + 1
days_arange = np.arange(days_num)
# make figure
fig_dict = {
"data": [],
"layout": {},
"frames": []
}
fig_dict["layout"]["updatemenus"] = [
{
"buttons": [
{
"args": [None, {"frame": {"duration": 200, "redraw": False},
"fromcurrent": True, "transition": {"duration": 100,
"easing": "quadratic-in-out"}}],
"label": "Play",
"method": "animate"
},
{
"args": [[None], {"frame": {"duration": 0, "redraw": False},
"mode": "immediate",
"transition": {"duration": 0}}],
"label": "Pause",
"method": "animate"
}
],
"direction": "left",
"pad": {"r": 10, "t": 87},
"showactive": False,
"type": "buttons",
"x": 0.1,
"xanchor": "right",
"y": 0,
"yanchor": "top"
}
]
sliders_dict = {
"active": 0,
"yanchor": "top",
"xanchor": "left",
"currentvalue": {
"font": {"size": 20},
"prefix": "Day ",
"visible": True,
"xanchor": "right"
},
"transition": {"duration": 300, "easing": "cubic-in-out"},
"pad": {"b": 10, "t": 50},
"len": 0.9,
"x": 0.1,
"y": 0,
"steps": []
}
data_series = []
for offset in days_arange:
at = tstart + offset
ad = [
# observed total
{
"x": obsY[0:at],
"y": obsInc[0:at],
"type": "scatter",
"mode": "lines",
"name": "Total"
},
]
data_series.append(ad)
# make data
fig_dict["data"] += data_series[-1]
# make frames
for offset in range(tsize-tstart):
frame = {"data": [], "name": str(offset)}
frame["data"] += data_series[offset]
fig_dict["frames"].append(frame)
slider_step = {"args": [
[offset],
{"frame": {"duration": 300, "redraw": False},
"mode": "immediate",
"transition": {"duration": 300}}
],
"label": str(offset),
"method": "animate"}
sliders_dict["steps"].append(slider_step)
fig_dict["layout"]["sliders"] = [sliders_dict]
fig = go.Figure(fig_dict)
kwparam = {'width':950 , 'height':700, 'autosize':False}
fig.update_layout(title='# 罹患者数推移 (X軸=総数、Y軸=増加数)', xaxis_title='総数', yaxis_title='増加数', **kwparam)
#fig.update_layout(xaxis = { "type": "log"}, yaxis = { "type": "log"} )
#fig.update_layout(xaxis = { "type": "log"} )
fig.show()
fig.update_layout(xaxis = { "type": "log"}, yaxis = { "type": "log"} )
fig.update_layout(title='# 罹患者数推移 両軸対数 (X軸=総数、Y軸=増加数)' )
fig.show()